{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### bz2压缩文件的操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 对于bz2压缩文件的操作\n",
    "\n",
    "import bz2\n",
    "with bz2.BZ2File(txtpath,'r') as f:\n",
    "        for line in f:\n",
    "            line = str(line, encoding = \"utf8\") #必须要加，否则\n",
    "            \n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 对于json的操作\n",
    "\n",
    "一、json是一种编码格式，并不是dict\n",
    "1. json.dump() 和 json.load() 来编码和解码JSON数据,用于处理文件   \n",
    "2. json.dumps将一个Python数据结构转换为JSON，json.loads将一个JSON编码的字符串转换回一个Python数据结构：\n",
    "3. ensure_ascii=False  用在dump的时候，中文不乱码\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "# 1. 将整个数据结构写入json文件或者从json文件读出：\n",
    "\n",
    "with open('test.json','w',encoding = 'utf-8') as fw:\n",
    "    json.dump(data_dict, fw, ensure_ascii=False)\n",
    "with open('test.json','r',encoding = 'utf-8') as fr:\n",
    "    date = json.load(fr)\n",
    "    \n",
    "# 2 . 将数据结构一行行json编码写入文件，或者从文件读出来\n",
    " #一行行写入\n",
    "tmp_dict = {'question':'fasdfa','head':'fadf'}\n",
    "result_f.write(json.dumps(tmp_dict,ensure_ascii=False) + '\\n')\n",
    " # 一行行读出来\n",
    "    with open('ner_result.txt','r',encoding='utf-8') as fr:\n",
    "        for line in fr:\n",
    "            line = line.strip()\n",
    "            tmp_dict = json.loads(line)\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 文件操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "### 1 . 同时对两个文件操作\n",
    "with open('a1.txt','r',encoding='utf-8') as fr, open('a2.txt','a',encoding='utf-8') as fw:\n",
    "    for line in fr: # 读文件\n",
    "        line = line.strip() # 惯用处理，可以去掉头尾空格和换行符\n",
    "         # strip() 方法用于移除字符串头尾指定的字符（默认为空格或换行符,包括'\\n', '\\r',  '\\t',  ' ')\n",
    "        part = line.split('\\t') #  split \n",
    "        if len(part) == 2:\n",
    "            key = part[0]\n",
    "            value = part[1]\n",
    "            \n",
    "    \n",
    "# 2 . 把一个数组遍历写入文件 writelines: 可以写迭代结构， write只能写入str\n",
    "\"\"\"读+写\"\"\"\n",
    "#读文件，做处理，再写入文件\n",
    "li = []\n",
    "with open(\"./data/test\",encoding='utf-8') as fr:\n",
    "    for line in fr:\n",
    "        \n",
    "        line =line.strip()\n",
    "        li.append('1'+'\\t'+ line)#做处理 ，放到列表中\n",
    "        \n",
    "with open('data/test_xkj1','a',encoding='utf-8') as fw:\n",
    "    fw.writelines(e + '\\n' for e in li) "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 字符串操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "write() argument must be str, not generator",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-20-1b5767b706dc>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[0mli\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;34m'a'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'b'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'c'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'data/test_xkj2'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'a'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mencoding\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'utf-8'\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mfw\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m     \u001b[0mfw\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0me\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m'\\n'\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0me\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mli\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;31mTypeError\u001b[0m: write() argument must be str, not generator"
     ]
    }
   ],
   "source": [
    "# \n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 字典 dict操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dict_keys(['b', 'c'])\n",
      "['b', 'c']\n",
      "dict_values([2, 3])\n",
      "dict_items([('b', 2), ('c', 3)])\n"
     ]
    },
    {
     "ename": "NameError",
     "evalue": "name 'cmp' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-22-1c3739c3a2b7>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     21\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata_dict\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     22\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 23\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcmp\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata_dict\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mdata_dict2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m: name 'cmp' is not defined"
     ]
    }
   ],
   "source": [
    "data_dict = {}\n",
    "data_dict2 = {\"a\":1,\"b\":2, \"c\":3}\n",
    "# 增加\n",
    "data_dict.update({\"a\":1,\"b\":2, \"c\":3})\n",
    "\n",
    "# 删除某个键值\n",
    "del data_dict[\"a\"]\n",
    "\n",
    "# 判断键值在不在\n",
    "if \"a\" in data_dict:\n",
    "    print(\"a\")\n",
    "\n",
    "# 返回key值\n",
    "print(data_dict.keys()) #不是list类型要强制转换，\n",
    "print(list(data_dict.keys()))\n",
    "\n",
    "# 返回value值\n",
    "print(data_dict.values())\n",
    "\n",
    "#  返回可遍历的元组形式\n",
    "print(data_dict.items())\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 条件判断"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# if not array\n",
    "\n",
    "# 哪些东西相当于空"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
